#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author：TanDabao
# CreateTime：2021/10/21 14:38
'''
站长素材免费简历模板下载
'''

import requests
import os
from lxml import etree

# 创建存储目录
dirName = 'Temporary'
if not os.path.exists(dirName):
    os.mkdir(dirName)

url = 'https://sc.chinaz.com/jianli/free_%d.html'
for page in range(1,6):    # 爬取前5页
    if page == 1:
        new_url = 'https://sc.chinaz.com/jianli/free.html'
    else:
        new_url = url%page

    proxies = {"http": None, "https": None}
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'
    }

    # 使用requests的encoding属性避免中文乱码，utf-8不行就gbk
    response = requests.get(url=new_url, proxies=proxies, headers=headers)
    response.encoding = 'utf-8'
    html = response.text

    tree = etree.HTML(html)
    main = tree.xpath('//*[@id="container"]/div/a')
    for son in main:
        title = son.xpath('./img/@alt')
        details = son.xpath('./@href')
        details_url = 'https:' + details[0] +'#down'

        res = requests.get(url=details_url, proxies=proxies, headers=headers)
        res.encoding = 'utf-8'
        son_html = res.text

        son_tree = etree.HTML(son_html)
        download_url = son_tree.xpath('//*[@id="down"]/div[2]/ul/li[1]/a/@href')[0]

        result = requests.get(url=download_url, proxies=proxies, headers=headers).content
        rarPath = dirName + '/' + title[0].strip() + '.rar'

        with open(rarPath, 'wb') as f:
            f.write(result)
        print(title[0].strip(), '\t完成')